In [1]:
from IPython.display import IFrame, display, HTML
import pandas as pd
import numpy as np
from jinja2 import Template
from bokeh.models import (
ColumnDataSource, Plot, Circle, Range1d,
LinearAxis, TapTool, HoverTool, Text,
SingleIntervalTicker,
)
from bokeh.models.actions import Callback
from bokeh.models.widgets import Slider
from bokeh.palettes import Spectral6
from bokeh.plotting import vplot, hplot
from bokeh.resources import Resources
from bokeh.embed import components
In [2]:
# Links via http://www.gapminder.org/data/
"""
population_url = "http://spreadsheets.google.com/pub?key=phAwcNAVuyj0XOoBL_n5tAQ&output=xls"
fertility_url = "http://spreadsheets.google.com/pub?key=phAwcNAVuyj0TAlJeCEzcGQ&output=xls"
life_expectancy_url = "http://spreadsheets.google.com/pub?key=tiAiXcrneZrUnnJ9dBU-PAw&output=xls"
def get_data(url):
# Get the data from the url and return only 1962 - 2013
df = pd.read_excel(url, index_col=0)
df = df.unstack().unstack()
df = df[(df.index >= 1964) & (df.index <= 2013)]
df = df.unstack().unstack()
return df
fertility_df = get_data(fertility_url)
life_expectancy_df = get_data(life_expectancy_url)
population_df = get_data(population_url)
fertility_df.to_hdf('fertility_df.hdf', 'df')
life_expectancy_df.to_hdf('life_expectancy_df.hdf', 'df')
population_df.to_hdf('population_df.hdf', 'df')
"""
fertility_df = pd.read_hdf('fertility_df.hdf', 'df')
life_expectancy_df = pd.read_hdf('life_expectancy_df.hdf', 'df')
population_df = pd.read_hdf('population_df.hdf', 'df')
In [3]:
# have common countries across all data
fertility_df = fertility_df.drop(fertility_df.index.difference(life_expectancy_df.index))
population_df = population_df.drop(population_df.index.difference(life_expectancy_df.index))
# get a size value based on population, but don't let it get too small
population_df_size = np.sqrt(population_df/np.pi)/200
min_size = 3
population_df_size = population_df_size.where(population_df_size >= min_size).fillna(min_size)
In [12]:
fertility_df.to_csv('fertility.csv')
population_df.to_csv('population.csv')
life_expectancy_df.to_csv('life_expectancy.csv')
Get the regions and color them
In [4]:
regions_url = "https://docs.google.com/spreadsheets/d/1OxmGUNWeADbPJkQxVPupSOK5MbAECdqThnvyPrwG5Os/pub?gid=1&output=xls"
regions_df = pd.read_excel(regions_url, index_col=0)
regions_df = regions_df.drop(regions_df.index.difference(life_expectancy_df.index))
#regions_df.Group = regions_df.Group.astype('category')
#cats = list(regions_df.Group.cat.categories)
#def get_color(r):
# index = cats.index(r.Group)
return Spectral6[cats.index(r.Group)]
regions_df['region_color'] = regions_df.apply(get_color, axis=1)
In [5]:
# Set up the data.
#
# We make a dictionary of sources that can then be passed to the callback so they are ready for JS object to use.
#
# Dictionary_of_sources is:
# {
# 1962: '_1962',
# 1963: '_1963',
# ....
# }
# We turn this into a string and replace '_1962' with _1962. So the end result is js_source_array:
# '{1962: _1962, 1963: _1963, ....}'
#
# When this is passed into the callback and then accessed at runtime,
# the _1962, _1963 are replaced with the actual source objects that are passed in as args.
sources = {}
years = list(fertility_df.columns)
region_color = regions_df['region_color']
region_color.name = 'region_color'
for year in years:
fertility = fertility_df[year]
fertility.name = 'fertility'
life = life_expectancy_df[year]
life.name = 'life'
population = population_df_size[year]
population.name = 'population'
new_df = pd.concat([fertility, life, population, region_color], axis=1)
sources['_' + str(year)] = ColumnDataSource(new_df)
dictionary_of_sources = dict(zip([x for x in years], ['_%s' % x for x in years]))
js_source_array = str(dictionary_of_sources).replace("'", "")
# Set up the plot
xdr = Range1d(1, 9)
ydr = Range1d(20, 100)
plot = Plot(
x_range=xdr,
y_range=ydr,
title="",
plot_width=800,
plot_height=400,
outline_line_color=None,
toolbar_location=None,
)
AXIS_FORMATS = dict(
minor_tick_in=None,
minor_tick_out=None,
major_tick_in=None,
major_label_text_font_size="10pt",
major_label_text_font_style="normal",
axis_label_text_font_size="10pt",
axis_line_color='#AAAAAA',
major_tick_line_color='#AAAAAA',
major_label_text_color='#666666',
major_tick_line_cap="round",
axis_line_cap="round",
axis_line_width=1,
major_tick_line_width=1,
)
xaxis = LinearAxis(SingleIntervalTicker(interval=1), axis_label="Live births per woman", **AXIS_FORMATS)
yaxis = LinearAxis(SingleIntervalTicker(interval=20), axis_label="Average life expectancy (years)", **AXIS_FORMATS)
plot.add_layout(xaxis, 'below')
plot.add_layout(yaxis, 'left')
# Add the year in background (add before circle)
text_source = ColumnDataSource({'year': ['%s' % years[0]]})
text = Text(x=2, y=35, text='year', text_font_size='150pt', text_color='#EEEEEE')
plot.add_glyph(text_source, text)
# Add the circle
renderer_source = sources['_%s' % years[0]]
circle_glyph = Circle(
x='fertility', y='life', size='population',
fill_color='region_color', fill_alpha=0.8,
line_color='#7c7e71', line_width=0.5, line_alpha=0.5)
circle_renderer = plot.add_glyph(renderer_source, circle_glyph)
# Add the hover (only against the circle and not other plot elements)
tooltips = "@index"
plot.add_tools(HoverTool(tooltips=tooltips, renderers=[circle_renderer]))
# Add the slider
code = """
var year = slider.get('value'),
sources = %s,
new_source_data = sources[year].get('data');
renderer_source.set('data', new_source_data);
renderer_source.trigger('change');
text_source.set('data', {'year': [String(year)]});
text_source.trigger('change');
""" % js_source_array
callback = Callback(args=sources, code=code)
slider = Slider(start=years[0], end=years[-1], value=1, step=1, title="Year", callback=callback)
callback.args["slider"] = slider
callback.args["renderer_source"] = renderer_source
callback.args["text_source"] = text_source
# Add the legend
text_x = 7
text_y = 95
text_properties = dict(x=text_x, text_font_size='10pt', text_color='#666666')
circle_properties = dict(size=10, line_color=None, fill_alpha=0.8)
for i, region in enumerate(cats):
plot.add_glyph(Text(y=text_y, text=[region], **text_properties))
plot.add_glyph(Circle(x=text_x - 0.1, y=text_y + 2, fill_color=Spectral6[i], **circle_properties))
text_y = text_y - 5
# Stick the plot and the slider together
layout = vplot(plot, hplot(slider))
In [6]:
with open('my_template.html', 'r') as f:
template = Template(f.read())
script, div = components(layout)
html = template.render(
title="Bokeh - Gapminder demo",
plot_script=script,
plot_div=div,
)
display(HTML(html))
In [7]:
with open('gapminder.html', 'w') as f:
f.write(html)
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]: